
clear
set more off
capture log close
set seed 12345
set matsize 5000
set maxvar 20000	
* set globals
global OUTPUT "$DIR/Output"
global DATA "$DIR"	
cd "$DIR"

/* SET LOCALS */
* set earliest tax year to keep
local yrb = 2008
* set last tax year to keep
local yre = 2016
* set excluded base year for event-time and controls
local baseyr "2012"
* log or level ("ln"  "level")
local rlog "ln"
local rloglab "`rlog'"

* sample selection for unbalanced panel
local yrsamp "0812"
* set firm size sample restriction (just one)
local sizesamp "1ft"
* set sample restrictions (can use multiple)
local samprests "fsamp_1120s fsamp_annual fsamp_merge_k1 fsamp_clean"
	* active (1120s)
	* annual filers (annual)
	* income on K-1s matches netinc (clean)
	* firm matches w/ K-1 (merge_k1)
* set firm variables to keep as controls
local firmvar "naics netinc revenue totalincome valadd totalded wages_1120s ofcomp"


*********************************************
**  Prep local labor mkt stats
*******************************************
use $DATA/zip_cz_variables_2012, clear
bys zip_str: gen nid=_n==1
tab nid
drop nid
ren state statename
sort zip
tempfile tzip
save `tzip', replace

*******************************
**  Prep firm variables  **
*******************************
use $DATA/scorp100p_analysis_sample, clear
keep if year>=`yrb' & year<=`yre'
keep firm_tin year n_ee n_fulltime n_ee_top n_ee_lower n_ft_lower tot_wages tot_wages_ft n_own n_own_top state_firm fsamp* `firmvar' zip_firm

* Keep only firms that fit the sample restriction
foreach r in `samprests' {
	keep if `r'_`yrsamp'==1
}
tab year
* firmsize restriction
keep if fsamp_`sizesamp'_`yrsamp'==1
drop fsamp*
tab year

* prep average wages
replace n_ft_lower = 0 if n_ft_lower<0
gen wages = tot_wages / n_ee_lower
gen wages_ft = tot_wages_ft / n_ft_lower
count if wages==.

* value-added per worker
gen valaddpw_ft = valadd/n_fulltime
gen valaddpw = valadd/n_ee
replace valaddpw_ft = valaddpw if valaddpw_ft==.

* prep state variable for regressions 
encode state_firm, gen(state)
drop state_firm

**********************************
**  Create Treatment Variables  **
**********************************
/* define as the treatment group, firms with any top owner */
gen t1 = n_own_top>=1 & n_own_top~=.
tab t1

/* define as the treatment group, firms with all top owner */
gen t2 = 0
replace t2 = . if t1==1
replace t2 = 1 if n_own_top==n_own & n_own~=.
tab t2

/* set baseyear variables */
local vs "t1 t2 netinc valaddpw_ft naics state n_ft_lower"
sort firm_tin year
*2012
foreach v in `vs' {
	gen z12 = .
	replace z12 = `v' if year==2012
	by firm_tin: egen `v'_2012 = max(z12)
	drop z12
}

/* Create deciles of production variables and owner income variables */
local pinc "netinc_2012 valaddpw_ft_2012" 
foreach i in `pinc' {
	xtile `i'cat = `i',nquantiles(10)
}

* get baseline CZ's
destring zip_firm, g(zip)
sort zip
merge m:1 zip using `tzip'
tab year _merge
tab state _merge
*tab zip_firm if _merge==1
drop if _merge==2
gen flag_cz_merge=_merge==3
drop _merge
bys firm_tin: egen zz=sum(flag_cz_merge)
tab zz
gen fsamp_cz_merge = zz== 9
drop zz
foreach v in cz {
	gen zz = 0
	replace zz = `v' if year==2012
	bys firm_tin: egen `v'_2012=sum(zz)
	drop zz
}

* prep event-study variables
forval y = `yrb' / `yre' {
	gen y`y' = 0
	replace y`y' = 1 if year==`y'
}
forval y = `yrb' / `yre' {
	gen t`y' = 0
}

gen lnwages = ln(wages)
gen lnwages_ft = ln(wages_ft)

* prep treatment variables
* create for different base years
foreach tt in t1  {
foreach yy in 2011 {
	gen vv=.
	replace vv=`tt' if year==`yy'
	bys firm_tin: egen `tt'_`yy'=max(vv)
	drop vv
}
}
gen tt1112=.
replace tt1112 = 1 if t1_2011==1 & t1_2012==1
replace tt1112 = 0 if t1_2011==0 & t1_2012==0

*******************************************
** regressions
*******************************************
local bm1 = `baseyr'-1
local bp1 = `baseyr'+1
local r1 "i.netinc_`baseyr'cat##ib`baseyr'.year"
local r2 "i.valaddpw_ft_`baseyr'cat##ib`baseyr'.year i.netinc_`baseyr'cat##ib`baseyr'.year"
local r3 "i.valaddpw_ft_`baseyr'cat##ib`baseyr'.year i.netinc_`baseyr'cat##ib`baseyr'.year i.naics_`baseyr'##ib`baseyr'.year"
local r4 "i.valaddpw_ft_`baseyr'cat##ib`baseyr'.year i.netinc_`baseyr'cat##ib`baseyr'.year i.naics_`baseyr'##ib`baseyr'.year i.cz_`baseyr'##ib`baseyr'.year"

**********************
**  Regression results 
**	- Table 2 of main text
**********************
local t "tt1112"
forval y = `yrb' / `yre' {
	drop t`y'
	gen t`y' = 0
	replace t`y' = . if `t'==.
	replace t`y' = `t' if year==`y'
}
* base 2011,2012
local events "t`yrb'-t2010 t`bp1'-t`yre' y`yrb'-y2010 y`bp1'-y`yre' `t'"

foreach o in wages_ft {
	* loop over regressions
	foreach r in 1 2 3 4 {
		eststo r`r': quietly reghdfe `rlog'`o' `events' [aw=n_ft_lower_2012] if (`t'==1 | `t'==0) & (year==2011 | year==2012 | year==2016), absorb(firm_tin `r`r'' ) vce(cluster firm_tin)
	}
	esttab using "$OUTPUT/table2_panelB_unbal.csv", replace se ar2 mtitles( ) keep(t2016) addnote("Each specification includes a different set of control variabels")
	eststo clear
}


***********************
**  Exit rates by t v c
**	- Table A.2
***********************

preserve
	collapse (count) n=firm_tin, by(year t1_2012)
	list
	gen zz = 0
	replace zz = n if t1==1
	bys year: egen n_treat = sum(zz)
	drop zz
	ren n n_ctrl
	sort t1 year
	list
	drop if t1==1
	
	foreach j in treat ctrl {
		gen zz=0
		replace zz = n_`j' if year==2012
		egen zz7 = sum(zz)
		gen `j'_rate = n_`j'/zz7
		drop zz zz7
	}
	list
	drop t1
	gen treat = "t1"
	tempfile tt1
	save `tt1', replace
restore

preserve
	collapse (count) n=firm_tin, by(year t2_2012)
	list
	drop if t2_2012==.
	gen zz = 0
	replace zz = n if t2==1
	bys year: egen n_treat = sum(zz)
	drop zz
	ren n n_ctrl
	sort t2 year
	list
	drop if t2==1
	
	foreach j in treat ctrl {
		gen zz=0
		replace zz = n_`j' if year==2012
		egen zz7 = sum(zz)
		gen `j'_rate = n_`j'/zz7
		drop zz zz7
	}
	list
	drop t2
	gen treat = "t2" 
	tempfile tt2
	save `tt2', replace
restore

preserve
	use `tt1', clear
	append using `tt2'
	outsheet using $OUTPUT/table_A2.csv, comma replace
restore
	


	
clear
exit
exit




